package main.java.study.pdf解析;

import net.sourceforge.tess4j.ITesseract;
import net.sourceforge.tess4j.Tesseract;
import net.sourceforge.tess4j.TesseractException;

import java.io.File;

public class ImageToText {
    public static void main(String[] args) {
        File imageFile = new File("page_0.png");
        ITesseract tesseract = new Tesseract();

        // 设置Tesseract安装路径
        tesseract.setDatapath("E:\\Program Files\\Tesseract-OCR\\tessdata");  // Tesseract的安装目录
        tesseract.setLanguage("eng"); // 识别英文，中文为 "chi_sim"

        try {
            // 识别图片中的文字
            String text = tesseract.doOCR(imageFile);
            System.out.println("识别出的文字内容: " + text);
        } catch (TesseractException e) {
            e.printStackTrace();
        }
    }
}
